clear all
	set varabbrev off
	set more off
	set rmsg on, permanently
	
	ssc install zipsave
	
	
/*******************************************************	
***SET DIRECTORIES AND OPTIONS
*******************************************************/

	*Set directories and options:
	global mydirectory "/afs/crc.nd.edu/group/LEO/infutor_downes_working/extractrequests/LosAngeles_CA/"
	*NOTE: The personal/project sub-folder within the LEO CRC folder where you saved the output from 1_make_extract.do; may require updating.
	
	global geo LIHTC
	*NOTE: This is the geographic area we pulled the Infutor extract for. Can be a county, state, etc. Doesn't have to exact match any state abbreviation or FIPS code convention. 
		* E.g., Colorado = "CO", Santa Clara County = "SantaClara_CA"
	
	global filedate 0422
	*NOTE: MMYY
	
	local ssnmatch = 0
	*NOTE: Set = 1 if doing a match to SSN using Infutor PIDs; set = 0 o.w.
	
	local addressmatch = 0
	*NOTE: Set = 1 if doing a match using address fields; set = 0 o.w.
	
	

	
	
	
	
	
/*******************************************************	
***CREATE INFUTOR EXTRACT FROM STATE FILES
*******************************************************/
	

	cd "$mydirectory"
	
	*NOTE: All the state files below come from 1_make_extract.do
	
	
***(1) Append files from all states:
	zipuse AL1_cleaned.dta.zip, clear
	foreach state_abbrev in "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN" "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "PR" "RI" "SC" "SD" "TN" "TX" "UT" "VT" "VA" "WA" "WV" "WI" "WY" {
		
		zipappend using `state_abbrev'1_cleaned.dta.zip
		
	}
	
	
	
	
***(2) Append additional files from the big states which had to be split up:
	foreach state_abbrev in "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN" "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "PR" "RI" "SC" "SD" "TN" "TX" "UT" "VT" "VA" "WA" "WV" "WI" "WY" {
		
		capture confirm file `state_abbrev'2_cleaned.dta.zip 
		
		if !_rc {
		
			zipappend using `state_abbrev'2_cleaned.dta.zip
			
		}
		
	}
	
	foreach state_abbrev in "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "DC" "FL" "GA" "HI" "ID" "IL" "IN" "IA" "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ" "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "PR" "RI" "SC" "SD" "TN" "TX" "UT" "VT" "VA" "WA" "WV" "WI" "WY" {
		
		capture confirm file `state_abbrev'3_cleaned.dta.zip 
		
		if !_rc {
		
			zipappend using `state_abbrev'3_cleaned.dta.zip
			
		}
		
	}
	
	
	
/*******************************************************	
***SAVE VARIOUS FORMS OF THE MASTER EXTRACT
*******************************************************/
	
	
	
***(1) Save the master file:
	compress
	zipsave ${geo}_ever_${filedate}, replace
	
	
	
	
***(2) Save the file of only PIDs:
	
	if `ssnmatch' == 1 {
	
		preserve
		
		keep PID*
		duplicates drop *, force
		save ${geo}_ever_${filedate}_pids.dta, replace
		*NOTE: Want it to be a dta file because we are going to use the describe command once it's downloaded to the local drive. It won't be very big since it's just PIDs.
		
		restore
	
	}
	
	
	

	
	
***(3) Save the file of PIDs and addresses:
	
	if `addressmatch' == 1 {
			
		keep PID* complete_address* city* state* zip* dwell_type* 
		*add_num* pre_direction* street_name* street_suffix* post_direction* apartment_name* apartment_number* zip_plus* fips_county* effective_date* orig_file_date* last_verif_date*
		
		tolong complete_address# city# state# zip# dwell_type#, i(PID_) j(add_seq)
		*add_num# pre_direction# street_name# street_suffix# post_direction# apartment_name# apartment_number# zip_plus# fips_county# effective_date# orig_file_date# last_verif_date#
		drop zip_plus*
		
		drop if missing(complete_address)
		*NOTE: Won't be able to geocode these.
		
		drop if dwell_type == "P"
		drop dwell_type
		*NOTE: ArcGIS can't geocode PO boxes.
		
		preserve 
		
		*Define the set of unique addresses:
		keep complete_address city state zip
		duplicates drop complete_address city state zip, force 
		gen long address_id = _n
		*NOTE: This identifies unique addresses at the address-city-state-zip level. This is the lowest geospatial level relevant for geocoding and thus for linking to LEO project data. 
		
		tempfile uniqueaddresses
		save `uniqueaddresses', replace
		
		restore
		
		keep PID_ add_seq complete_address city state zip
		
		merge m:1 complete_address city state zip using `uniqueaddresses', keep(3) nogen
		
		preserve
		
		*Save a mapping of PID and add_seq : address_id
		keep PID_ add_seq address_id
		rename PID_ PID
		save ${geo}_ever_${filedate}_idmap.dta, replace
		*NOTE: Will need this later when we merge geocoded data back in with the master file. 
		
		restore
		
		keep complete_address city state zip address_id 
		duplicates drop *, force
			
		*Save unique addresses in CSV file:
		export delimited using ${geo}_ever_${filedate}_geo_pre, replace
		*NOTE: Very important to save as CSV, not zipped or dta file. Much easier to download to local drive + input to ArcGIS as CSV.
	
	}
	
	
	
	
	
	
